installing required packages and library

# install required packages/library
install.packages("matlib")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("rsample")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("corrplot")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("MASS")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
## Warning: package 'MASS' is not available for this version of R
## 'MASS' version 7.3-65 is in the repositories but depends on R (>= 4.4.0)
## 'MASS' version 7.3-65 is in the repositories but depends on R (>= 4.6)
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
install.packages("gridExtra")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("knitr")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

import required packages/library

library(matlib)     # for matrix operations
library(ggplot2)    # for data visualization
library(rsample)    # for data splitting
library(corrplot)   # for correlation visualization
## corrplot 0.95 loaded
library(MASS)       # for statistical functions
library(gridExtra)  # for arranging plots
library(dplyr)      # for data manipulation
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)      # for tables

load features dataset

features<-as.matrix(read.csv(file="data/features.csv",header=F))
colnames(features)<-c("x1", "x3", "x4", "x5")
head(features)
##         x1    x3      x4    x5
## [1,]  8.34 40.77 1010.84 90.01
## [2,] 23.64 58.49 1011.40 74.20
## [3,] 29.74 56.90 1007.15 41.91
## [4,] 19.07 49.69 1007.22 76.79
## [5,] 11.80 40.66 1017.13 97.20
## [6,] 13.97 39.16 1016.05 84.60

load target dataset

target<-as.matrix(read.csv(file="data/target.csv",header=F))
colnames(target)<-c("X2")
head(target)
##          X2
## [1,] 480.48
## [2,] 445.75
## [3,] 438.76
## [4,] 453.09
## [5,] 464.43
## [6,] 470.96

load time series dataset

time<-as.matrix((read.csv(file="data/timeseries.csv",header=F)))
colnames(time)<-c("T1")
head(time)
##      T1
## [1,]  1
## [2,]  2
## [3,]  3
## [4,]  4
## [5,]  5
## [6,]  6

Task 1: Preliminary Data Analysis

time series of input signals

features.ts<-ts(features,start=c(min(time),max(time)),frequency=1)
plot(features.ts,main="Time Series - Input Signal",xlab="Time",ylab="Input Signal")

time series of output signal

target.ts<-ts(target,start=c(min(time),max(time)),frequency=1)
plot(target.ts,main="Time Series - Output Signal",xlab="Time",ylab="Output Signal")

Task 1.2: Distribution of Each Signal

# histogram for temperature (x1)
density_x1<-density(features[,"x1"])
hist(features[,"x1"],freq=FALSE, main="Distribution of Temperature (x1)", xlab="Ambient temperature (°C)")
lines(density_x1, lwd=2)

# histogram for ambient pressure (x3)
density_x3<-density(features[,"x3"])
hist(features[,"x3"],freq=FALSE, main="Distribution of Ambient Pressure (x3)", xlab="Atmospheric pressure (millibar)")
lines(density_x3, lwd=2)

# histogram for relative humidity (x4)
density_x4<-density(features[,"x4"])
hist(features[,"x4"],freq=FALSE, main="Distribution of Relative Humidity (x4)", xlab="Humidity level (%)")
lines(density_x4, lwd=2)

# histogram for exhaust vacuum (x5)
density_x5<-density(features[,"x5"])
hist(features[,"x5"],freq=FALSE, main="Distribution of Exhaust Vacuum (x5)", xlab="Vacuum collected from the steam turbine (cm Hg)")
lines(density_x5, lwd=2)

# histogram for net hourly electrical energy output (X2)
density_x2<-density(target[,"X2"])
hist(target[,"X2"],freq=FALSE, main="Distribution of Net hourly electrical energy output (X2)", xlab="Net hourly electrical energy output in MW")
lines(density_x2, lwd=2)

Task 1.3: Correlation and Scatter Plots

# correlation
combined <- cbind(features, target)
correlation<-cor(combined)
print(correlation)
##            x1         x3          x4          x5         X2
## x1  1.0000000  0.8441067 -0.50754934 -0.54253465 -0.9481285
## x3  0.8441067  1.0000000 -0.41350216 -0.31218728 -0.8697803
## x4 -0.5075493 -0.4135022  1.00000000  0.09957432  0.5184290
## x5 -0.5425347 -0.3121873  0.09957432  1.00000000  0.3897941
## X2 -0.9481285 -0.8697803  0.51842903  0.38979410  1.0000000
corrplot(correlation)

scatter plot of x1 and X2

plot(features[,"x1"], 
     target[,"X2"], 
     main="Temperature vs Net Hourly Electrical Energy Output", 
     xlab="Temperature (°C)", 
     ylab="Net Hourly Electrical Energy Output (MW)")

scatter plot of x3 and X2

plot(features[,"x3"], 
     target[,"X2"], 
     main="Ambient Pressure vs Net Hourly Electrical Energy Output", 
     xlab="Ambient Pressure (millibar)", 
     ylab="Net Hourly Electrical Energy Output (MW)")

scatter plot of x4 and X2

plot(features[,"x4"], 
     target[,"X2"], 
     main="Relative Humidity vs Net Hourly Electrical Energy Output", 
     xlab="Relative Humidity (%)", 
     ylab="Net Hourly Electrical Energy Output (MW)")

scatter plot of x5 and X2

plot(features[,"x5"], 
     target[,"X2"], 
     main="Exhaust Vacuum vs Net Hourly Electrical Energy Output", 
     xlab="Exhaust Vacuum (cm Hg)", 
     ylab="Net Hourly Electrical Energy Output (MW)")

Task 2: Regression - Modelling the Between the Gene Expression

# candidate nonlinear polynomial regression models
model1<-function(X, theta){
  theta[1]*X$x4 + theta[2]*X$x3^2 + theta[3]
}

model2<-function(X, theta){
  theta[1]*X$x4 + theta[2]*X$x3^2 + theta[3]*X$x5 + theta[4]
}

model3<-function(X, theta){
  theta[1]*X$x3 + theta[2]*X$x4 + theta[3]*X$x5^3
}

model4<-function(X, theta){
  theta[1]*X$x4 + theta[2]*X$x3^2 + theta[3]*X$x5^3 + theta[4]
}

model5<-function(X, theta){
  theta[1]*X$x4 + theta[2]*X$x1^2 + theta[3]*X$x3^2 + theta[4]
}

Task 2.1: estimation of model parameters (theta) for every candidate model using least square

# convert matrix into dataframe
X<-as.data.frame(features)

theta_ridge<-function(X, y, lambda=1e-1){
  X_bias<-cbind(1, as.matrix(X))
  XtX <- t(X_bias) %*% X_bias  # compute X^T * X
  XtX_reg <- XtX + lambda * diag(c(0, rep(1, ncol(X)))) # regularization term 
  theta <- ginv(XtX_reg) %*% t(X_bias) %*% y # generalized inverse to solve
  return(theta)
}
# theta for model 1
X_model1 <- data.frame(x4 = X$x4, x3 = X$x3^2)
theta_model1 <- theta_ridge(X_model1, target)
print("Theta Model 1:")
## [1] "Theta Model 1:"
print(theta_model1)
##                 X2
## [1,]  0.0004686643
## [2,]  0.4775167121
## [3,] -0.0094776540
# theta for model 2
X_model2 <- data.frame(x4 = X$x4, x3 = X$x3^2, x5 = X$x5)
theta_model2 <- theta_ridge(X_model2, target)
print("Theta Model 2:")
## [1] "Theta Model 2:"
print(theta_model2)
##                 X2
## [1,]  0.0004582797
## [2,]  0.4636853830
## [3,] -0.0089687414
## [4,]  0.1695904901
X_model3 <- data.frame(x3 = X$x3, x4 = X$x4, x5 = X$x5^3)  
theta_model3 <- theta_ridge(X_model3, target)
print("Theta for Model 3:")
## [1] "Theta for Model 3:"
print(theta_model3)
##                X2
## [1,] 4.295980e-04
## [2,] 2.652240e-02
## [3,] 4.349701e-01
## [4,] 2.778559e-05
X_model4 <- data.frame(x4 = X$x4, x3 = X$x3^2, x5 = X$x5^3) 
theta_model4 <- theta_ridge(X_model4, target)
print("Theta for Model 4:")
## [1] "Theta for Model 4:"
print(theta_model4)
##                 X2
## [1,]  4.621424e-04
## [2,]  4.713285e-01
## [3,] -8.968820e-03
## [4,]  1.066722e-05
X_model5 <- data.frame(x4 = X$x4, x1 = X$x1^2, x3 = X$x3^2)
theta_model5 <- theta_ridge(X_model5, target)
print("Theta for Model 5:")
## [1] "Theta for Model 5:"
print(theta_model5)
##                X2
## [1,]  0.000465739
## [2,]  0.474420584
## [3,] -0.033921625
## [4,] -0.003655083

Task 2.2: Model Residual Sum of Squared Error (RSS)

compute_rss<-function(X,y,theta,model){
  y_pred<-model(X, theta) 
  rss<-sum((y-y_pred)^2)
  return(rss)
}
# RSS for model1
rss_model1 <- compute_rss(X_model1, target, theta_model1, model1)
print("Model 1 RSS:")
## [1] "Model 1 RSS:"
print(rss_model1)
## [1] 5.026192e+17
rss_model2<-compute_rss(X_model2, target, theta_model2, model2)
print("Model 2 RSS:")
## [1] "Model 2 RSS:"
print(rss_model2)
## [1] 4.739227e+17
rss_model3<-compute_rss(X_model3, target, theta_model3, model3)
print("Model 3 RSS:")
## [1] "Model 3 RSS:"
print(rss_model3)
## [1] 1.191366e+38
rss_model4<-compute_rss(X_model4, target, theta_model4, model4)
print("Model 4 RSS:")
## [1] "Model 4 RSS:"
print(rss_model4)
## [1] 5.065207e+34
rss_model5<-compute_rss(X_model5,target,theta_model5,model5)
print("Model 5 RSS:")
## [1] "Model 5 RSS:"
print(rss_model5)
## [1] 1.204208e+15

Log-likelihood function

compute_loglikelihood_variance<-function(rss, n){
  sigma_squared <- rss/(n-1)
  log_likelihood<- -(n/2)*log(2*pi)-(n/2)*log(sigma_squared)-(1/(2*sigma_squared))*rss
  return(list(log_likelihood=log_likelihood,variance=sigma_squared))
}
# log likelihood and variance model1
compute_model1<-compute_loglikelihood_variance(rss_model1, length(target))
model1_log_likelihood<-compute_model1$log_likelihood
model1_variance<-compute_model1$variance
print("Model 1 Log-Likelihood:")
## [1] "Model 1 Log-Likelihood:"
print(model1_log_likelihood)
## [1] -164714.6
print("Model 1 Variance:")
## [1] "Model 1 Variance:"
print(model1_variance)
## [1] 5.253676e+13
# log likelihood and variance model2
compute_model2<-compute_loglikelihood_variance(rss_model2, length(target))
model2_log_likelihood<-compute_model2$log_likelihood
model2_variance<-compute_model2$variance
print("Model 2 Log-Likelihood:")
## [1] "Model 2 Log-Likelihood:"
print(model2_log_likelihood)
## [1] -164433.3
print("Model 2 Variance:")
## [1] "Model 2 Variance:"
print(model2_variance)
## [1] 4.953723e+13
# log likelihood and variance model3
compute_model3<-compute_loglikelihood_variance(rss_model3, length(target))
model3_log_likelihood<-compute_model3$log_likelihood
model3_variance<-compute_model3$variance
print("Model 3 Log-Likelihood:")
## [1] "Model 3 Log-Likelihood:"
print(model3_log_likelihood)
## [1] -389154.6
print("Model 3 Variance:")
## [1] "Model 3 Variance:"
print(model3_variance)
## [1] 1.245287e+34
# log likelihood and variance model4
compute_model4<-compute_loglikelihood_variance(rss_model4, length(target))
model4_log_likelihood<-compute_model4$log_likelihood
model4_variance<-compute_model4$variance
print("Model 4 Log-Likelihood:")
## [1] "Model 4 Log-Likelihood:"
print(model4_log_likelihood)
## [1] -352016.2
print("Model 4 Variance:")
## [1] "Model 4 Variance:"
print(model4_variance)
## [1] 5.294457e+30
# log likelihood and variance model5
compute_model5<-compute_loglikelihood_variance(rss_model5, length(target))
model5_log_likelihood<-compute_model5$log_likelihood
model5_variance<-compute_model5$variance
print("Model 5 Log-Likelihood:")
## [1] "Model 5 Log-Likelihood:"
print(model5_log_likelihood)
## [1] -135847.9
print("Model 5 Variance")
## [1] "Model 5 Variance"
print(model5_variance)
## [1] 1.25871e+11

Compute AIC and BIC

# calculate aic
compute_aic<-function(log_likelihood, k){
  aic<-2 * k - 2 * log_likelihood
  return(aic)
}

# calculate bic
compute_bic<-function(log_likelihood, k, n){
  bic <- k * log(n) - 2 * log_likelihood
  return(bic)
}
# aic and bic model 1
model1_k<-length(theta_model1)
model1_aic<-compute_aic(model1_log_likelihood, model1_k)
model1_bic<-compute_bic(model1_log_likelihood, model1_k, length(target))
print("Model 1 AIC:")
## [1] "Model 1 AIC:"
print(model1_aic)
## [1] 329435.2
print("Model 1 BIC:")
## [1] "Model 1 BIC:"
print(model1_bic)
## [1] 329456.7
# aic and bic model 2
model2_k<-length(theta_model2)
model2_aic<-compute_aic(model2_log_likelihood, model2_k)
model2_bic<-compute_bic(model2_log_likelihood, model2_k, length(target))
print("Model 2 AIC:")
## [1] "Model 2 AIC:"
print(model2_aic)
## [1] 328874.7
print("Model 2 BIC:")
## [1] "Model 2 BIC:"
print(model2_bic)
## [1] 328903.4
# aic and bic model 3
model3_k<-length(theta_model3)
model3_aic<-compute_aic(model3_log_likelihood, model3_k)
model3_bic<-compute_bic(model3_log_likelihood, model3_k, length(target))
print("Model 3 AIC:")
## [1] "Model 3 AIC:"
print(model3_aic)
## [1] 778317.3
print("Model 3 BIC:")
## [1] "Model 3 BIC:"
print(model3_bic)
## [1] 778345.9
# aic and bic model 4
model4_k<-length(theta_model4)
model4_aic<-compute_aic(model4_log_likelihood, model4_k)
model4_bic<-compute_bic(model4_log_likelihood, model4_k, length(target))
print("Model 4 AIC:")
## [1] "Model 4 AIC:"
print(model4_aic)
## [1] 704040.4
print("Model 4 BIC:")
## [1] "Model 4 BIC:"
print(model4_bic)
## [1] 704069.1
# aic and bic model 5
model5_k<-length(theta_model5)
model5_aic<-compute_aic(model5_log_likelihood, model5_k)
model5_bic<-compute_bic(model5_log_likelihood, model5_k, length(target))
print("Model 5 AIC:")
## [1] "Model 5 AIC:"
print(model5_aic)
## [1] 271703.8
print("Model 5 BIC:")
## [1] "Model 5 BIC:"
print(model5_bic)
## [1] 271732.4

Task 2.5: Distributin of model prediction errors

compute_model_prediction_error<-function(X, y, theta, model){
  y_pred<-model(X, theta)
  prediction_error<-y - y_pred
  return(prediction_error)
}
# QQ plot for model 1 distribution
prediction_error_model1 <- compute_model_prediction_error(X_model1, target, theta_model1, model1)
qqnorm(prediction_error_model1, main = "Q-Q Plot of Prediction Error for Model 1")
qqline(prediction_error_model1)  

# QQ plot for model 2 distribution
prediction_error_model2 <- compute_model_prediction_error(X_model2, target, theta_model2, model2)
qqnorm(prediction_error_model2, main = "Q-Q Plot of Prediction Error for Model 2")
qqline(prediction_error_model2)  

# QQ plot for model 3 distribution
prediction_error_model3 <- compute_model_prediction_error(X_model3, target, theta_model3, model3)
qqnorm(prediction_error_model3, main = "Q-Q Plot of Prediction Error for Model 3")
qqline(prediction_error_model3)  

# QQ plot for model 4 distribution
prediction_error_model4 <- compute_model_prediction_error(X_model4, target, theta_model4, model4)
qqnorm(prediction_error_model4, main = "Q-Q Plot of Prediction Error for Model 4")
qqline(prediction_error_model4)  

# QQ plot for model 5 distribution
prediction_error_model5 <- compute_model_prediction_error(X_model5, target, theta_model5, model5)
qqnorm(prediction_error_model5, main = "Q-Q Plot of Prediction Error for Model 5")
qqline(prediction_error_model5)  

Task 2.7 train Test Split

set.seed(42)  # for reproducibility

# Create the full design matrix for Model 5
X_model5 <- data.frame(
  x4 = X$x4,
  x1 = X$x1^2,
  x3 = X$x3^2
)
y <- target

# --- 1. Split into training and testing sets (70/30) ---
n <- nrow(X_model5)
train_idx <- sample(1:n, size = round(0.7 * n))
test_idx <- setdiff(1:n, train_idx)

X_train <- X_model5[train_idx, ]
y_train <- y[train_idx, , drop = FALSE]
X_test <- X_model5[test_idx, ]
y_test <- y[test_idx, , drop = FALSE]

# --- 2. Estimate theta on training data ---
library(MASS)

theta_ridge <- function(X, y, lambda = 1e-1) {
  X_bias <- cbind(1, as.matrix(X))
  XtX <- t(X_bias) %*% X_bias
  XtX_reg <- XtX + lambda * diag(c(0, rep(1, ncol(X))))
  theta <- ginv(XtX_reg) %*% t(X_bias) %*% y
  return(theta)
}

theta_hat <- theta_ridge(X_train, y_train)

# --- 3. Make predictions on the test set ---
X_test_bias <- cbind(1, as.matrix(X_test))
y_pred <- X_test_bias %*% theta_hat

# --- 4. Compute 95% confidence intervals ---
# Residual standard error from training data
X_train_bias <- cbind(1, as.matrix(X_train))
y_train_pred <- X_train_bias %*% theta_hat
residuals_train <- y_train - y_train_pred
rss <- sum(residuals_train^2)
df <- nrow(X_train) - length(theta_hat)
sigma2 <- rss / df

# Standard errors of predictions
XtX_inv <- ginv(t(X_train_bias) %*% X_train_bias + 1e-1 * diag(c(0, rep(1, ncol(X_train)))))
se_pred <- sqrt(diag(X_test_bias %*% XtX_inv %*% t(X_test_bias)) * sigma2)

# 95% confidence intervals
lower_bound <- y_pred - 1.96 * se_pred
upper_bound <- y_pred + 1.96 * se_pred

# --- 5. Plot predictions, confidence intervals, and actual values ---
plot(y_test, type = 'p', col = 'black', pch = 16,
     ylab = "Target", xlab = "Test Sample Index", main = "Model 5 Prediction with 95% CI")
points(y_pred, col = 'blue', pch = 17)
arrows(1:length(y_pred), lower_bound, 1:length(y_pred), upper_bound, 
       length = 0.05, angle = 90, code = 3, col = 'red')

legend("topleft", legend = c("True y", "Predicted y", "95% CI"), 
       col = c("black", "blue", "red"), pch = c(16, 17, NA), lty = c(NA, NA, 1))

install.packages("gridExtra")
## Installing package into '/home/meutsabdahal/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
set.seed(42)

# --- 1) Prepare ---

# Your estimated coefficients from Task 2 (ridge regression)
theta_hat_vec <- as.vector(theta_hat)  # includes intercept + 3 params
names(theta_hat_vec) <- c("intercept", colnames(X_train))

# Find indices of 2 largest absolute coefficients (excluding intercept)
abs_coefs <- abs(theta_hat_vec[-1])  
top2_idx <- order(abs_coefs, decreasing = TRUE)[1:2]
param_names <- names(abs_coefs)[top2_idx]

cat("Selected parameters for ABC:", param_names, "\n")
## Selected parameters for ABC: x4 x1
# Fix other parameters at estimated values
fixed_params <- theta_hat_vec[-c(1, top2_idx + 1)]  # exclude intercept and top2

# --- 2) Define simulation function based on model ---

simulate_y <- function(X_data, theta_vec, sigma_sq) {
  # theta_vec = c(intercept, coef_x4, coef_x1_squared, coef_x3_squared)
  mu <- theta_vec[1] + 
        theta_vec[2] * X_data$x4 + 
        theta_vec[3] * X_data$x1 + 
        theta_vec[4] * X_data$x3
  y_sim <- rnorm(nrow(X_data), mean = mu, sd = sqrt(sigma_sq))
  return(y_sim)
}

# --- 3) Define prior sampler for the 2 params ---

# Get estimated values for the two parameters
theta_1_hat <- theta_hat_vec[top2_idx[1] + 1]  # +1 for intercept offset
theta_2_hat <- theta_hat_vec[top2_idx[2] + 1]

# Define ±10% prior range around estimated values
prior_range_1 <- c(theta_1_hat * 0.9, theta_1_hat * 1.1)
prior_range_2 <- c(theta_2_hat * 0.9, theta_2_hat * 1.1)

# Use estimated sigma_sq from training residuals
sigma_sq_hat <- sigma2

prior_sampler <- function() {
  theta_1 <- runif(1, min = min(prior_range_1), max = max(prior_range_1))
  theta_2 <- runif(1, min = min(prior_range_2), max = max(prior_range_2))
  list(theta_1 = theta_1, theta_2 = theta_2, sigma_sq = sigma_sq_hat)
}

# --- 4) Calculate summary statistics ---

calc_summary_stats <- function(y_vec) {
  c(mean = mean(y_vec), var = var(y_vec))
}

S_obs <- calc_summary_stats(y_train)

# --- 5) Distance metric ---

distance_metric <- function(S_sim, S_obs) {
  sqrt(sum((S_sim - S_obs)^2))
}

# --- 6) ABC rejection ---

N_sim <- 10000  # number of simulations (increase for better posterior)

pilot_runs <- 1000  # for epsilon selection
pilot_distances <- numeric(pilot_runs)

cat("Pilot run to select epsilon...\n")
## Pilot run to select epsilon...
for (i in 1:pilot_runs) {
  sample_params <- prior_sampler()
  
  # Build full theta vector: intercept + top2 params + fixed params
  theta_full <- numeric(length(theta_hat_vec))
  theta_full[1] <- theta_hat_vec[1]  # intercept fixed
  theta_full[top2_idx[1] + 1] <- sample_params$theta_1
  theta_full[top2_idx[2] + 1] <- sample_params$theta_2
  
  # fixed params fill others
  fixed_names <- names(fixed_params)
  for (name in fixed_names) {
    pos <- which(names(theta_hat_vec) == name)
    theta_full[pos] <- fixed_params[name]
  }
  
  y_sim <- simulate_y(X_train, theta_full, sample_params$sigma_sq)
  S_sim <- calc_summary_stats(y_sim)
  pilot_distances[i] <- distance_metric(S_sim, S_obs)
}

finite_dists <- pilot_distances[is.finite(pilot_distances)]
if (length(finite_dists) == 0) stop("No finite distances in pilot run!")

epsilon <- quantile(finite_dists, 0.05)
cat("Chosen epsilon (5th percentile):", epsilon, "\n")
## Chosen epsilon (5th percentile): 10.56309
# --- 7) Rejection ABC ---

accepted_thetas <- matrix(NA, nrow = N_sim, ncol = 2)
accepted_count <- 0

cat("Starting ABC rejection sampling...\n")
## Starting ABC rejection sampling...
pb <- txtProgressBar(min = 0, max = N_sim, style = 3)
##   |                                                                              |                                                                      |   0%
for (i in 1:N_sim) {
  sample_params <- prior_sampler()
  
  theta_full <- numeric(length(theta_hat_vec))
  theta_full[1] <- theta_hat_vec[1]
  theta_full[top2_idx[1] + 1] <- sample_params$theta_1
  theta_full[top2_idx[2] + 1] <- sample_params$theta_2
  for (name in fixed_names) {
    pos <- which(names(theta_hat_vec) == name)
    theta_full[pos] <- fixed_params[name]
  }
  
  y_sim <- simulate_y(X_train, theta_full, sample_params$sigma_sq)
  S_sim <- calc_summary_stats(y_sim)
  dist <- distance_metric(S_sim, S_obs)
  
  if (is.finite(dist) && dist < epsilon) {
    accepted_count <- accepted_count + 1
    accepted_thetas[accepted_count, ] <- c(sample_params$theta_1, sample_params$theta_2)
  }
  
  setTxtProgressBar(pb, i)
}
##   |                                                                              |                                                                      |   1%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |==                                                                    |   4%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |=======                                                               |  11%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |=========                                                             |  14%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |==============                                                        |  21%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  22%  |                                                                              |================                                                      |  23%  |                                                                              |================                                                      |  24%  |                                                                              |=================                                                     |  24%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |=====================                                                 |  31%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |=======================                                               |  34%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  39%  |                                                                              |============================                                          |  40%  |                                                                              |============================                                          |  41%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |==============================                                        |  44%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |===================================                                   |  51%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |=====================================                                 |  54%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  56%  |                                                                              |========================================                              |  57%  |                                                                              |========================================                              |  58%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |==========================================                            |  61%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |============================================                          |  64%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |===============================================                       |  68%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |===================================================                   |  74%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  79%  |                                                                              |========================================================              |  80%  |                                                                              |========================================================              |  81%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |==========================================================            |  84%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |===============================================================       |  91%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |=================================================================     |  94%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |====================================================================  |  98%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================|  99%  |                                                                              |======================================================================| 100%
close(pb)
accepted_thetas <- accepted_thetas[1:accepted_count, , drop = FALSE]
cat("Number of accepted samples:", accepted_count, "\n")
## Number of accepted samples: 589
# --- 8) Plot results ---

library(ggplot2)
library(gridExtra)

df_post <- data.frame(
  param1 = accepted_thetas[,1],
  param2 = accepted_thetas[,2]
)

p1 <- ggplot(df_post, aes(x = param1)) +
  geom_histogram(aes(y=..density..), bins = 30, fill = "skyblue", color = "black") +
  geom_vline(xintercept = theta_1_hat, color = "red", linetype = "dashed") +
  labs(title = paste("Posterior of", param_names[1]), x = param_names[1])

p2 <- ggplot(df_post, aes(x = param2)) +
  geom_histogram(aes(y=..density..), bins = 30, fill = "lightgreen", color = "black") +
  geom_vline(xintercept = theta_2_hat, color = "red", linetype = "dashed") +
  labs(title = paste("Posterior of", param_names[2]), x = param_names[2])

p_joint <- ggplot(df_post, aes(x = param1, y = param2)) +
  geom_point(alpha = 0.4, color = "blue") +
  geom_vline(xintercept = theta_1_hat, linetype = "dashed", color = "red") +
  geom_hline(yintercept = theta_2_hat, linetype = "dashed", color = "red") +
  labs(title = "Joint Posterior Distribution", x = param_names[1], y = param_names[2])

grid.arrange(p1, p2, p_joint, ncol = 2)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.